function sim_data = sim_choices(vp,fp,col_long,data)

% This function is called by obj_funct.m in "STEP 2" of the estimation/simulation

% Initialize the simulated data matrix
sim_data = nan(fp.N*(fp.M+1)*fp.R,fp.tot_cols);

% Also load the correct simulated nonlabor income draws:
if fp.doing_bootstrap_se == 1
    NLI_sim = fp.sim_boot_matrix_NLI_3d(:,:,:,fp.b_value); % Note: fp.b_value denotes the current bootstrap loop/sample
else
    NLI_sim = fp.NLI_sim_data_3d; % these are the simulated NLI under the standard estimated NLI parameters, based on the actual data sample (no bootstrap sample)
end

% Load intercept and slope of the latent cognitive capital conversion to test scores (see also paper Appendix D.1.5 for more details)
kappa0 = fp.kappa0;
kappa1 = fp.kappa1;

% Pre-define the regressors for the surface regressions
% Since we have KKxNN_c grid points in the value function, we have KKxNN_c
% observations for our regression, and will have to vectorize.
% Note: Value function  = KK x NN_c
X_0 = ones(fp.KK,fp.NN_c);              % regressor 0: intercept.
X_k = repmat(log(fp.kgrid),1,fp.NN_c);  % regressor 1: cognitive skills log(k). Create NN_C identical columns
X_n = repmat(fp.ngrid',fp.KK,1);        % regressor 2: patience capital n. Create KK identical rows
X_kn = kron(log(fp.kgrid),fp.ngrid');   % regressor 3: log(k)*beta_c. Interaction term
Xmat = [X_0(:) X_k(:) X_n(:) X_kn(:)];  % vectorized versions. This is a K*Z by 4.

% Initial guess for inner solver later on
param_start_untrans = [0.30 0.55];  % loosely based on mean time fractions in the data [taup,tau1]
param_start_trans = log( param_start_untrans ./ (1-param_start_untrans));
par0=param_start_trans; % initiate guess for fminunc, case where CCT=0
par1=param_start_trans; % initiate guess for fminunc, case where CCT=1

% initialize index to fill in sim_data matrix
sim_data_index = 0;
for r = 1:fp.R
    for q = 1:fp.N
        sim_id = r*1000 + q; % don't use i instead of q! We need a unique identifier for every simulated household
        % (Note: if we would have more than 1000 hhs, use r*10000 + q)
        data_here = data(1+(fp.M+1)*(q-1):(fp.M+1)*q, : ); % mother/child i's data
        child_age97 = data_here(1,col_long.child_age97); % this variable is a constant anyway within this data block, so take the first row
        index97 = (data_here(:,col_long.age) == child_age97);
        lw_raw97 = data_here(index97,col_long.lw_raw);

        % Load exogenous household characteristics
        mom_educ = data_here(1,col_long.mom_educ);
        dad_educ = data_here(1,col_long.dad_educ);
        mom_age = data_here(:,col_long.mom_age); % vector of size (fp.M + 1) * 1
        dad_age = data_here(:,col_long.dad_age); % vector of size (fp.M + 1) * 1

        % Technology parameters (conditional on household q's parental education levels)
        vp.delta1 = vp.delta1mat(:,q); % mom time productivity
        vp.delta2 = vp.delta2mat(:,q); % dad time productivity

        vp.beta_p = fp.beta_p_mat(q,r); %parental discount factor
        vp.CCTcost = vp.cost_draws(q,r); % household-level CCT disutility cost

        % initiate psi_{c,M+1} and psi_{p,M+1} given parametric assumptions form expectations for this model
        psi_c_tp1_mat = vp.lambda3 * (1+vp.expect_n_tp1(:,:,end)); % this creates an NN x 2
        psi_p_tp1_mat = ((1-fp.varphi)*vp.alpha4/(1-vp.beta_p)) + fp.varphi*psi_c_tp1_mat;

        zp = NaN(fp.M,4); % pre-allocate regression coefficients for V_p on (log k, beta_c)
        zc = NaN(fp.M,4); % pre-allocate regression coefficients for V_c on (log k, beta_c)
        Vp_mat = nan(fp.KK,fp.NN_c,fp.M); % pre-allocate values for parents
        Vc_mat = nan(fp.KK,fp.NN_c,fp.M); % pre-allocate values for child
        CCT_mat = nan(fp.KK,fp.NN_c,fp.M); % pre-allocate optimal CCT dummy choice

        % Start the backward induction process
        for t = fp.M:-1:child_age97
            vp.w1 = vp.wage_list(q,r,t+1,1); % mom wage offer
            vp.w2 = vp.wage_list(q,r,t+1,2); % dad wage offer
            vp.I = NLI_sim(t+1,q,r);         % non-labor income
            if t < fp.M % for t<M, we need the regression slope coefficients from the previous iteration
                % Note: when regressing on log k and beta_c, we take expectation of next period's patience capital stock n_tp1
                psi_c_tp1_mat = zc(t+1,2) + zc(t+1,4)*vp.expect_n_tp1(:,:,t); % Zx2 matrix
                psi_p_tp1_mat = zp(t+1,2) + zp(t+1,4)*vp.expect_n_tp1(:,:,t); % Zx2 matrix
            end

            for nn = 1:fp.NN_c
                vp.beta_ct = fp.betac_vec(nn); % Note that beta_c = n/(1+n), i.e. child's period t discount factor

                % First, we loop over CCT choice, because conditional on (n_t,CCT_t), all other household choices are independent of k_t
                % 1) Solve for case where CCT = 0
                vp.psi_c_tp1 = psi_c_tp1_mat(nn,1);
                vp.psi_p_tp1 = psi_p_tp1_mat(nn,1);
                Deltac0 = vp.beta_ct*vp.delta3(t)*vp.psi_c_tp1; % useful scalar for case where CCT = 0
                vp.r = 0; % CCT reward elasticity if CCT = 0
                gamma0 = Deltac0/(vp.lambda1+Deltac0);
                [par0,obj_eval_est0,exitflag0] = fminunc(@(param) stack_solver_inner(param,fp,vp,1,gamma0,t), par0,fp.options_solution); % note: initial guess par0 updates in every iteration
                % NOTE: since return_u = 1 here, note that V_p_0 = -obj_eval_est0, but the value function is incomplete (see stack_solver_inner)

                % 2) Solve for case where CCT = 1
                vp.psi_c_tp1 = psi_c_tp1_mat(nn,2);
                vp.psi_p_tp1 = psi_p_tp1_mat(nn,2);
                Deltac1 = vp.beta_ct*vp.delta3(t)*vp.psi_c_tp1; % useful scalar for case CCT = 0:
                Deltap1 = vp.beta_p*vp.delta3(t)*vp.psi_p_tp1;
                gamma1 = Deltap1/(vp.alpha5_tilde + Deltap1); % gives same solution
                [par1,obj_eval_est1,exitflag1] = fminunc(@(param) stack_solver_inner(param,fp,vp,1,gamma1,t), par1,fp.options_solution); % note: initial guess par1 updates in every iteration
                % NOTE: since return_u = 1 here, note that V_p_1 = -obj_eval_est1, but the value function is incomplete (see stack_solver_inner)

                if exitflag1 < 1 || exitflag0 < 1
                    [q r t nn]
                    [par0 par1]
                    [obj_eval_est0 obj_eval_est1]
                    [exitflag0 exitflag1]
                    [psi_c_tp1_mat psi_p_tp1_mat]
                    [vp.w1 vp.w2 vp.I]
                    error('error: exitflag not equal to 1')
                end

                % Conditional on these choices, we solve for the optimal CCT choice, which varies with the initial state k
                for kk = 1:fp.KK
                    k = fp.kgrid(kk); % initial state of cognitive human capital
                    % Now we complete the parents' value function when CCT=0, including all terms relating to log(k) and log(n) from the surface regressions
                    % NOTE: the incomplete value functions were calculated when return_u=1, so have to multiply them by minus 1!
                    if t == fp.M % in the final period, we don't need the regression surface approximations
                        V_pt_0 = -obj_eval_est0 + (vp.alpha4_tilde+vp.beta_p*vp.delta5(t)*psi_p_tp1_mat(nn,1))*log(k);
                        V_pt_1 = -obj_eval_est1 + (vp.alpha4_tilde+vp.beta_p*vp.delta5(t)*psi_p_tp1_mat(nn,2))*log(k);
                    else % for t<M, we need to include all terms relating to log(k) and log(n) from the regression surface approximation (see paper Appendix B)
                        V_pt_0 = -obj_eval_est0 + (vp.alpha4_tilde+vp.beta_p*vp.delta5(t)*psi_p_tp1_mat(nn,1))*log(k) + zp(t+1,1) + zp(t+1,3)*vp.expect_n_tp1(nn,1,t);
                        V_pt_1 = -obj_eval_est1 + (vp.alpha4_tilde+vp.beta_p*vp.delta5(t)*psi_p_tp1_mat(nn,2))*log(k) + zp(t+1,1) + zp(t+1,3)*vp.expect_n_tp1(nn,2,t);
                    end

                    % Finally, we save optimal choices - Comparing difference in value functions to the CCT disutility cost
                    tmpcct = (V_pt_1 - V_pt_0 >= vp.CCTcost); % this defines the optimal CCT 0/1 dummy
                    CCT_mat(kk,nn,t) = tmpcct; % store optimal CCT choice
                    vp.psi_c_tp1 = psi_c_tp1_mat(nn,tmpcct+1);
                    vp.psi_p_tp1 = psi_p_tp1_mat(nn,tmpcct+1);
                    vp.r = tmpcct*(Deltap1-fp.varphi*Deltac1)/(fp.varphi*vp.lambda2); % equal to 0 if CCT = 0, and optimal r* otherwise.
                    Vp_mat(kk,nn,t) = (1-tmpcct)*V_pt_0+tmpcct*(V_pt_1-vp.CCTcost); % save the correct parental value function
                    tmp=stack_solver_inner((1-tmpcct)*par0+tmpcct*par1,fp,vp,0,(1-tmpcct)*gamma0+tmpcct*gamma1,t); % plug in optimal parameters, returns a struct
                    choices(kk,nn,t) = tmp; % this stores the struct in a large matrix
                    if t == fp.M % to complete the child's value function in final period, we don't need regression surface approximation
                        Vc_mat(kk,nn,t) = tmp.V_c + (vp.lambda3+vp.beta_ct*vp.delta5(t)*psi_c_tp1_mat(nn,tmpcct+1))*log(k);
                    else % for t<M, we include all terms from the regression surface approximation (see paper Appendix B)
                        Vc_mat(kk,nn,t) = tmp.V_c + (vp.lambda3+vp.beta_ct*vp.delta5(t)*psi_c_tp1_mat(nn,tmpcct+1))*log(k) + zc(t+1,1) + zc(t+1,3)*vp.expect_n_tp1(nn,tmpcct+1,t);
                    end
                end % kk
            end % nn
            if any(Vp_mat(:) < -10000)
                disp('Warning: some Vp values are below -10000!')
            end

            % Finally, run OLS regressions of V_{c,t} and V_{p,t} on log(k), n and their interaction term
            tmp = Vp_mat(:,:,t); % this is a KK by NN_c matrix, need to vectorize it
            [zp(t,:),~,~,~,stats1] = regress(tmp(:),Xmat); % regression for parents value function
            tmp = Vc_mat(:,:,t); % this is a KK by NN_c matrix, need to vectorize it
            [zc(t,:),~,~,~,stats2] = regress(tmp(:),Xmat); % regression for child's value function

            % Marginal effects as a function of n_t (which is itself a function of n_{t-1} and CCT_{t-1}): dVt / dlog(kt)
            marg_p = zp(t,2)+zp(t,4)*vp.expect_n_tp1(:,:,t-1);
            marg_c = zc(t,2)+zc(t,4)*vp.expect_n_tp1(:,:,t-1);

            if any(marg_c(:)<0) || any(marg_p(:)<0)
                disp('Error: Negative marginal value found!')
                [q r t]
                marg_c
                marg_p
                zp
                zc
                stats1
                stats2
            end
        end % t

        % Now use forward induction to simulate choices in every period

        % Conditional on the household q, we randomly draw an initial test
        % score centered around the true observed initial lw_raw97

        %initial latent k level drawn from beta distribution
        beta_draw = fp.beta_draws_by_measure(q,r,lw_raw97);
        %invert to get latent initial k
        log_k_t = (log(beta_draw) - log(1-beta_draw) - kappa0)/kappa1; % this generally won't lie on the grid
        [~,ixk] = min(abs(log_k_t-log(fp.kgrid))); % initial grid point for k
        k_t_ongrid = fp.kgrid(ixk); % closest grid point to k_t

        ixn = vp.betac_init_draws(q,r,1); % see build_obj.m. This scalar indicates the index of beta_{c,t^h_0} = 1,2,...,Z where we set Z = 3 possible levels of beta_{c,t}

        % Next, start forward simulation given initial draws of (k,n) and (w1,w2,NLI) sequences
        for t = 0:fp.M
            sim_data_index = sim_data_index + 1; % we use this index to fill out the simulated data matrix
            sim_data(sim_data_index,col_long.id) = sim_id;
            sim_data(sim_data_index,col_long.mom_age) = mom_age(t+1); % this is a (fp.M+1)*1 vector
            sim_data(sim_data_index,col_long.dad_age) = dad_age(t+1); % this is a (fp.M+1)*1 vector
            sim_data(sim_data_index,col_long.age) = t;                % need this for moment function
            sim_data(sim_data_index,col_long.mom_educ) = mom_educ;
            sim_data(sim_data_index,col_long.dad_educ) = dad_educ;
            sim_data(sim_data_index,col_long.child_age97) = child_age97; % we need this variable in the moment function
            if t >= child_age97
                choices_here = choices(ixk,ixn,t); % Collect simulated data
                if t == child_age97
                    % in this case, we want to save the actual observed test score, not the transformed and re-transformed
                    % score, in order to cut the simulation noise
                    kt_meas = lw_raw97; % generate simulated measured test score in 1997 = actual observed test score
                elseif t > child_age97
                    temp = exp(kappa0 + kappa1*log(k_t_ongrid)); % use closest value that lies on the k grid
                    p = temp ./ (1+temp);
                    kt_meas = binoinv(fp.score_draws(q,r,t+1),fp.test_max,p); % generate simulated measured test score
                end
                % Save all optimal household choices
                sim_data(sim_data_index,col_long.tau1) = choices_here.tau1;     % mom investment time
                sim_data(sim_data_index,col_long.tau2) = choices_here.tau2;     % dad investment time
                sim_data(sim_data_index,col_long.tau_c) = choices_here.tauc;    % child self-investment time
                sim_data(sim_data_index,col_long.mom_hours) = choices_here.h1;  % mom labor supply
                sim_data(sim_data_index,col_long.dad_hours) = choices_here.h2;  % dad labor supply
                sim_data(sim_data_index,col_long.l1) = choices_here.l1;         % mom leisure
                sim_data(sim_data_index,col_long.l2) = choices_here.l2;         % dad leisure
                sim_data(sim_data_index,col_long.lc) = choices_here.lc;         % child leisure
                sim_data(sim_data_index,col_long.e) = choices_here.e;           % child productive expenditures
                sim_data(sim_data_index,col_long.c) = choices_here.c;           % parental consumption
                sim_data(sim_data_index,col_long.x) = choices_here.x;           % child consumption
                sim_data(sim_data_index,col_long.Y) = choices_here.Y;           % total household income
                sim_data(sim_data_index,col_long.b) = choices_here.b;           % CCT reward function - intercept
                sim_data(sim_data_index,col_long.r) = choices_here.r;           % CCT reward function - slope
                sim_data(sim_data_index,col_long.nonlabor_income) = NLI_sim(t+1,q,r); % nonlabor income
                if choices_here.h1 > 0
                    sim_data(sim_data_index,col_long.mom_wage) = vp.wage_list(q,r,t+1,1); % mom hourly wage (if accepted)
                    % otherwise, mother's wage is censored (just as in the real data)
                end
                if choices_here.h2 > 0
                    sim_data(sim_data_index,col_long.dad_wage) = vp.wage_list(q,r,t+1,2); % dad hourly wage  (if accepted)
                    % otherwise, father's wage is censored (just as in the real data)
                end
                sim_data(sim_data_index,col_long.CCT) = CCT_mat(ixk,ixn,t);     % optimal CCT dummy at age t given initial state (k,beta_c)
                sim_data(sim_data_index,col_long.V_p) = Vp_mat(ixk,ixn,t);      % parents' value function, includes CCT cost if used
                sim_data(sim_data_index,col_long.V_c) = Vc_mat(ixk,ixn,t);      % child's value function
                sim_data(sim_data_index,col_long.k) = k_t_ongrid;               % child's initial stock of cognitive skills (nearest grid point)
                sim_data(sim_data_index,col_long.n) = fp.ngrid(ixn);            % child's initial stock of patience capital
                sim_data(sim_data_index,col_long.lw_raw) = kt_meas;             % child's measured Letter Word score
                sim_data(sim_data_index,col_long.u_p_tilde) = choices_here.u_p_tilde+vp.alpha4_tilde*log(k_t_ongrid) - CCT_mat(ixk,ixn,t) * vp.CCTcost;    % altruistic instantaneous utility of parents
                sim_data(sim_data_index,col_long.u_c) = choices_here.u_c+vp.lambda3*log(k_t_ongrid); % instantaneous utility of child

                % Update endogenous state variables for next iteration
                log_k_tp1 = choices_here.log_k_tp1 + vp.delta5(t)*log(k_t_ongrid); % this is the "exact" k_tp1 conditional on period-t choices, does not necessarily lie on k-grid
                [~,ixk] = min(abs(log_k_tp1-log(fp.kgrid))); % closest grid point for k
                k_t_ongrid = fp.kgrid(ixk); % closest grid point to k_t - this will be used in next iteration to generate kt_meas

                % Now compare the random uniform draw to this vector and choose n_t+1
                tmp_probs = vp.markov_trans(ixn,:,CCT_mat(ixk,ixn,t)+1,t); % load the correct 1x3 Markov probabilities conditional on initial patience stock n_t, optimal chosen CCT_t, and child age t
                tmprnd = fp.n_draws(q,r,t);
                ixn = 1*(tmprnd <= tmp_probs(1)) + 2*(tmprnd > tmp_probs(1) & tmprnd <= sum(tmp_probs(1:2))) + 3*(tmprnd > sum(tmp_probs(1:2))); % NOTE: this only works for case with Z=3 grid points

                % Save optimal next-period state variables given period-t choices
                sim_data(sim_data_index,col_long.k_tp1) = exp(log_k_tp1); % this generally won't lie on the k grid
                sim_data(sim_data_index,col_long.n_tp1) = fp.ngrid(ixn); % save the actual value of n

                if t == fp.M
                    sim_data(sim_data_index,col_long.betac_17) = fp.betac_vec(ixn); % save the actual value of beta_c,17
                    sim_data(sim_data_index,col_long.log_k_17) = log(k_t_ongrid); % save the on-grid value of log_k,17
                    sim_data(sim_data_index,col_long.u_c_17) = vp.lambda3 * log(k_t_ongrid) / (1-fp.betac_vec(ixn)); %vp.psi_c(fp.M+1) * log(k_t_ongrid);
                    sim_data(sim_data_index,col_long.u_p_17) = ((1-fp.varphi)*vp.alpha4/(1-vp.beta_p) + fp.varphi*vp.lambda3/(1-fp.betac_vec(ixn))) * log(k_t_ongrid); %vp.psi_p(fp.M+1) * log(k_t_ongrid); % could use log_k_tp1 for offgrid value
                end
            end
        end % t
    end % q
end % r

end % function